
***********CALCULATE MONTHLY S&P CONVEXITY*************
est clear

use "$jfqa_rep/CRSP_Daily_Index.dta", clear

qui{
drop if missing(spindx) 

sort year month day

gen monthyear=ym(year, month)

*assign first and last observation of month
gen prc_first=spindx if month~=month[_n-1]
egen min_prc=min(prc_first), by(year month)
replace prc_first=min_prc if missing(prc_first)
drop min_prc

gen prc_last=spindx if month~=month[_n+1]
egen min_prc=min(prc_last), by(year month)
replace prc_last=min_prc if missing(prc_last)
drop min_prc

sort year month
egen avgprc=mean(spindx), by(year month)
egen stdprc=sd(spindx), by(year month)
gen convexity=((prc_first+prc_last)/2-avgprc)/((prc_first+prc_last)/2)

drop if monthyear==monthyear[_n+1]

keep month year convexity
}

save "$jfqa_rep/T1_SP_Convexity.dta", replace


*****FORECAST PERIOD BEGINNING 1975*****
qui{
import delimited "$jfqa_rep/Goyal_Excel_Monthly.csv", clear

rename index sp_index 
rename d12 sp_div
rename e12 sp_earn

drop if missing(sp_index)

destring sp_index, force replace

merge 1:1 year month using "$jfqa_rep/CRSP_Monthly_Index.dta"
keep if _merge==3
drop _merge

gen monthyear=ym(year, month)
tsset monthyear
sort monthyear

gen log_prem=(crsp_spvw-log(1+rf))*100 

merge 1:1 year month using "$jfqa_rep/T1_SP_Convexity.dta"
keep if _merge==3
drop _merge
sort monthyear

order year month monthyear log_prem

*construct control variables
gen dp = log(sp_div/sp_index)
gen dy = log(sp_div/L.sp_index)
gen ep = log(sp_earn/sp_index)
gen de = log(sp_div/sp_earn)
replace svar=svar
replace bm=bm
replace ntis=ntis
*gen beta_prem=csp
replace tbl=tbl
replace lty=lty
replace ltr=ltr
gen tms=lty-tbl
gen dfy = aaa-baa
gen dfr=corpr-ltr
replace infl=infl

drop if year<1963
drop if year==1963 & month<6

gen prem_se=.
gen prem_fe=.
gen mean_log_prem=.

*lag variables
foreach i in dp dy ep de svar bm ntis tbl {
gen lag_`i'=L.`i'
}

foreach i in lty infl ltr tms dfy dfr convexity log_prem {
gen lag_`i'=L.`i'
}

*drop onset of covid
drop if year==2020 & month==3
drop if year==2020 & month==4

egen month_group=group(monthyear)
sort month_group
tsset month_group

qui{
forval j=140/713 {
egen mean_prem=mean(log_prem) if month_group<`j'
replace mean_log_prem=L.mean_prem if month_group==`j'
replace prem_fe=log_prem-L.mean_prem if month_group==`j'
replace prem_se=prem_fe^2 if month_group==`j'
drop mean_prem
}
egen mse_n=mean(prem_se)
egen count_periods=count(year) if ~missing(prem_se)
}


****PANEL B****
qui{
foreach i in convexity log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen `i'_se=.
gen `i'_fe=.
gen `i'_dif=.
gen yhat_`i'=.
qui{
forval j=140/713 {
reg log_prem lag_`i' if month_group<`j'
predict yhat if month_group==`j', xb
replace yhat_`i'=yhat if month_group==`j'
replace `i'_fe=log_prem-yhat if month_group==`j'
replace `i'_se=`i'_fe^2
replace `i'_dif=mean_log_prem-yhat if month_group==`j'
drop yhat
}
egen mse_`i'=mean(`i'_se)
gen r_squared_`i'=1-(mse_`i'/mse_n)
gen msfe_`i'=count_periods*(mse_n-mse_`i')/mse_`i'

gen cw_`i' = prem_fe^2-(`i'_fe^2-`i'_dif^2)
reg cw_`i'
gen cw_t_`i' = _b[_cons]/_se[_cons]
}
}
}


****PANEL C****
drop if missing(yhat_convex)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen lambda_`i'=.

reg log_prem yhat_convexity yhat_`i'
local ma2 (exp({b2})/(1+exp({b2})))
local ma1 (1/(1+exp({b2})))

nl (log_prem=`ma1'*yhat_convexity + `ma2'*yhat_`i'), delta(1e-7)

local na2 exp(_b[b2:_cons])/(1+exp(_b[b2:_cons]))
local na1 1/(1+exp(_b[b2:_cons]))

display "PREDICTOR `i'"
nlcom (a1: `na1') (a2: `na2')

constraint define 1 yhat_convexity + yhat_`i'=1
cnsreg log_prem yhat_convexity yhat_`i', constraint(1) noconstant
replace lambda_`i'=_b[yhat_convexity]
replace lambda_`i'=1 if lambda_`i'>1
}
}


*calculate HLN significance
drop count_periods
egen count_periods=count(year)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen d_`i'=(`i'_fe-convexity_fe)*`i'_fe

egen d_bar_`i'=mean(d_`i')
gen d_`i'_se=(d_`i'-d_bar_`i')^2

egen phi_`i'=mean(d_`i'_se)
gen v_hat_`i'=phi_`i'/count_periods

gen mhln_`i' = ((count_periods-1)/count_periods)*(1/sqrt(v_hat_`i'))*d_bar_`i'

drop d_`i' d_bar_`i' d_`i'_se v_hat_`i' 
}
}

keep if month_group==713
}

*R^2 and CW_t are for Panel B
*Lambda and MHLN are for Panel C
keep r_squared_* cw_t_* lambda_* mhln_*


*****FORECAST PERIOD BEGINNING 1980*****
qui{
import delimited "$jfqa_rep/Goyal_Excel_Monthly.csv", clear

rename index sp_index 
rename d12 sp_div
rename e12 sp_earn

drop if missing(sp_index)

destring sp_index, force replace

merge 1:1 year month using "$jfqa_rep/CRSP_Monthly_Index.dta"
keep if _merge==3
drop _merge

gen monthyear=ym(year, month)
tsset monthyear
sort monthyear

gen log_prem=(crsp_spvw-log(1+rf))*100 

merge 1:1 year month using "$jfqa_rep/T1_SP_Convexity.dta"
keep if _merge==3
drop _merge
sort monthyear

order year month monthyear log_prem

*construct control variables
gen dp = log(sp_div/sp_index)
gen dy = log(sp_div/L.sp_index)
gen ep = log(sp_earn/sp_index)
gen de = log(sp_div/sp_earn)
replace svar=svar
replace bm=bm
replace ntis=ntis
*gen beta_prem=csp
replace tbl=tbl
replace lty=lty
replace ltr=ltr
gen tms=lty-tbl
gen dfy = aaa-baa
gen dfr=corpr-ltr
replace infl=infl

drop if year<1963
drop if year==1963 & month<6

gen prem_se=.
gen prem_fe=.
gen mean_log_prem=.

*lag variables
foreach i in dp dy ep de svar bm ntis tbl {
gen lag_`i'=L.`i'
}

foreach i in lty infl ltr tms dfy dfr convexity log_prem {
gen lag_`i'=L.`i'
}

drop if year==2020 & month==3
drop if year==2020 & month==4

egen month_group=group(monthyear)
sort month_group
tsset month_group

qui{
forval j=200/713 {
egen mean_prem=mean(log_prem) if month_group<`j'
replace mean_log_prem=L.mean_prem if month_group==`j'
replace prem_fe=log_prem-L.mean_prem if month_group==`j'
replace prem_se=prem_fe^2 if month_group==`j'
drop mean_prem
}
egen mse_n=mean(prem_se)
egen count_periods=count(year) if ~missing(prem_se)
}

****PANEL B****
qui{
foreach i in convexity log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen `i'_se=.
gen `i'_fe=.
gen `i'_dif=.
gen yhat_`i'=.
qui{
forval j=200/713 {
reg log_prem lag_`i' if month_group<`j'
predict yhat if month_group==`j', xb
replace yhat_`i'=yhat if month_group==`j'
replace `i'_fe=log_prem-yhat if month_group==`j'
replace `i'_se=`i'_fe^2
replace `i'_dif=mean_log_prem-yhat if month_group==`j'
drop yhat
}
egen mse_`i'=mean(`i'_se)
gen r_squared_`i'=1-(mse_`i'/mse_n)
gen msfe_`i'=count_periods*(mse_n-mse_`i')/mse_`i'

gen cw_`i' = prem_fe^2-(`i'_fe^2-`i'_dif^2)
reg cw_`i'
gen cw_t_`i' = _b[_cons]/_se[_cons]
}
}
}


****PANEL C****
drop if missing(yhat_convex)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen lambda_`i'=.

reg log_prem yhat_convexity yhat_`i'
local ma2 (exp({b2})/(1+exp({b2})))
local ma1 (1/(1+exp({b2})))

nl (log_prem=`ma1'*yhat_convexity + `ma2'*yhat_`i'), delta(1e-7)

local na2 exp(_b[b2:_cons])/(1+exp(_b[b2:_cons]))
local na1 1/(1+exp(_b[b2:_cons]))

display "PREDICTOR `i'"
nlcom (a1: `na1') (a2: `na2')

constraint define 1 yhat_convexity + yhat_`i'=1
cnsreg log_prem yhat_convexity yhat_`i', constraint(1) noconstant
replace lambda_`i'=_b[yhat_convexity]
replace lambda_`i'=1 if lambda_`i'>1
}
}


*calculate HLN significance
drop count_periods
egen count_periods=count(year)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen d_`i'=(`i'_fe-convexity_fe)*`i'_fe

egen d_bar_`i'=mean(d_`i')
gen d_`i'_se=(d_`i'-d_bar_`i')^2

egen phi_`i'=mean(d_`i'_se)
gen v_hat_`i'=phi_`i'/count_periods

gen mhln_`i' = ((count_periods-1)/count_periods)*(1/sqrt(v_hat_`i'))*d_bar_`i'

drop d_`i' d_bar_`i' d_`i'_se v_hat_`i' 
}
}

keep if month_group==713
}

*R^2 and CW_t are for Panel B
*Lambda and MHLN are for Panel C
keep r_squared_* cw_t_* lambda_* mhln_*


*****FORECAST PERIOD BEGINNING 1985*****
qui{
import delimited "$jfqa_rep/Goyal_Excel_Monthly.csv", clear

rename index sp_index 
rename d12 sp_div
rename e12 sp_earn

drop if missing(sp_index)

destring sp_index, force replace

merge 1:1 year month using "$jfqa_rep/CRSP_Monthly_Index.dta"
keep if _merge==3
drop _merge

gen monthyear=ym(year, month)
tsset monthyear
sort monthyear

gen log_prem=(crsp_spvw-log(1+rf))*100 

merge 1:1 year month using "$jfqa_rep/T1_SP_Convexity.dta"
keep if _merge==3
drop _merge
sort monthyear

rm "$jfqa_rep/T1_SP_Convexity.dta"

order year month monthyear log_prem

*construct control variables
gen dp = log(sp_div/sp_index)
gen dy = log(sp_div/L.sp_index)
gen ep = log(sp_earn/sp_index)
gen de = log(sp_div/sp_earn)
replace svar=svar
replace bm=bm
replace ntis=ntis
*gen beta_prem=csp
replace tbl=tbl
replace lty=lty
replace ltr=ltr
gen tms=lty-tbl
gen dfy = aaa-baa
gen dfr=corpr-ltr
replace infl=infl

drop if year<1963
drop if year==1963 & month<6

gen prem_se=.
gen prem_fe=.
gen mean_log_prem=.

*lag variables
foreach i in dp dy ep de svar bm ntis tbl {
gen lag_`i'=L.`i'
}

foreach i in lty infl ltr tms dfy dfr convexity log_prem {
gen lag_`i'=L.`i'
}

*drop onset of covid
drop if year==2020 & month==3
drop if year==2020 & month==4

egen month_group=group(monthyear)
sort month_group
tsset month_group


qui{
forval j=260/713 {
egen mean_prem=mean(log_prem) if month_group<`j'
replace mean_log_prem=L.mean_prem if month_group==`j'
replace prem_fe=log_prem-L.mean_prem if month_group==`j'
replace prem_se=prem_fe^2 if month_group==`j'
drop mean_prem
}
egen mse_n=mean(prem_se)
egen count_periods=count(year) if ~missing(prem_se)
}


****PANEL B****
qui{
foreach i in convexity log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen `i'_se=.
gen `i'_fe=.
gen `i'_dif=.
gen yhat_`i'=.
qui{
forval j=260/713 {
reg log_prem lag_`i' if month_group<`j'
predict yhat if month_group==`j', xb
replace yhat_`i'=yhat if month_group==`j'
replace `i'_fe=log_prem-yhat if month_group==`j'
replace `i'_se=`i'_fe^2
replace `i'_dif=mean_log_prem-yhat if month_group==`j'
drop yhat
}
egen mse_`i'=mean(`i'_se)
gen r_squared_`i'=1-(mse_`i'/mse_n)
gen msfe_`i'=count_periods*(mse_n-mse_`i')/mse_`i'

gen cw_`i' = prem_fe^2-(`i'_fe^2-`i'_dif^2)
reg cw_`i'
gen cw_t_`i' = _b[_cons]/_se[_cons]
}
}
}


****PANEL C****
drop if missing(yhat_convex)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen lambda_`i'=.

reg log_prem yhat_convexity yhat_`i'
local ma2 (exp({b2})/(1+exp({b2})))
local ma1 (1/(1+exp({b2})))

nl (log_prem=`ma1'*yhat_convexity + `ma2'*yhat_`i'), delta(1e-7)

local na2 exp(_b[b2:_cons])/(1+exp(_b[b2:_cons]))
local na1 1/(1+exp(_b[b2:_cons]))

display "PREDICTOR `i'"
nlcom (a1: `na1') (a2: `na2')

constraint define 1 yhat_convexity + yhat_`i'=1
cnsreg log_prem yhat_convexity yhat_`i', constraint(1) noconstant
replace lambda_`i'=_b[yhat_convexity]
replace lambda_`i'=1 if lambda_`i'>1
}
}


*calculate HLN significance
drop count_periods
egen count_periods=count(year)

qui{
foreach i in log_prem  dp dy ep de bm svar ntis tbl lty ltr tms dfy dfr infl {
gen d_`i'=(`i'_fe-convexity_fe)*`i'_fe

egen d_bar_`i'=mean(d_`i')
gen d_`i'_se=(d_`i'-d_bar_`i')^2

egen phi_`i'=mean(d_`i'_se)
gen v_hat_`i'=phi_`i'/count_periods

gen mhln_`i' = ((count_periods-1)/count_periods)*(1/sqrt(v_hat_`i'))*d_bar_`i'

drop d_`i' d_bar_`i' d_`i'_se v_hat_`i' 
}
}

keep if month_group==713
}

*R^2 and CW_t are for Panel B
*Lambda and MHLN are for Panel C
keep r_squared_* cw_t_* lambda_* mhln_*
